# pip install pypdf
from typing import List

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document

from model.MyOllamaEmbeddings import MyOllamaEmbeddings

# file_path = "../resource/functional_design.pdf"
file_path = "../resource/feature-history.pdf"
loader = PyPDFLoader(file_path=file_path)
pages = loader.load_and_split()
# print(pages[0])


def pdf_search(page_list: List[Document], query):
    embeddings = MyOllamaEmbeddings(model="nomic-embed-text")
    faiss_index = FAISS.from_documents(page_list, embeddings)
    docs = faiss_index.similarity_search(query, k=2)
    for doc in docs:
        print("第" + str(doc.metadata["page"]) + "页:\n", doc.page_content[:300])
        print("--" * 50)


if __name__ == '__main__':
    query = "FP是什么"
    pdf_search(pages, query)
